theme_park = read_csv("ultimate data.csv") |>
janitor::clean_names()
## Rows: 920 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Park_Name, City, Country, Type, Region
## dbl (2): Year, Attendance
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
theme_park |>
filter(
region != c("Worldwide")
) |>
group_by(year, type) |>
mutate(
attendance = attendance / 100000
) |>
summarise(mean = mean(attendance),
sum = sum(attendance)) |>
knitr::kable(digits = 3)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
| 2019 |
Amusement/Theme Park |
542.806 |
37996.4 |
| 2019 |
Museum |
335.013 |
20100.8 |
| 2019 |
Water Park |
98.315 |
5898.9 |
| 2020 |
Amusement/Theme Park |
186.159 |
13031.1 |
| 2020 |
Museum |
77.742 |
4664.5 |
| 2020 |
Water Park |
38.558 |
2313.5 |
| 2021 |
Amusement/Theme Park |
320.910 |
22463.7 |
| 2021 |
Museum |
107.650 |
6459.0 |
| 2021 |
Water Park |
57.892 |
3473.5 |
| 2022 |
Amusement/Theme Park |
425.616 |
21280.8 |
| 2022 |
Museum |
193.388 |
11603.3 |
| 2022 |
Water Park |
77.972 |
4678.3 |
theme_park |>
mutate(
year = as.factor(year)
) |>
group_by(year) |>
plot_ly(y = ~attendance, color = ~year, type = "box", colors = "viridis")
theme_park|>
filter(
region != c("Worldwide")
) |>
group_by(region, year) |>
summarize(attend_sum = mean(attendance)) |>
plot_ly(x = ~year, y = ~attend_sum, color = ~region,
type = "scatter", mode = 'point', colors = "viridis")
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.